www.gusucode.com > VC++写的C编译器源代码附设计文档-源码程序 > VC++写的C编译器源代码附设计文档-源码程序/code/C- Compiler/Tokenizer.cpp

    //Download by http://www.NewXing.com
/*
 * This file is written by 陆晓春(robert1111@zju.edu.cn)
 *                                       Coryright, 2004
 */

// Tokenizer.cpp : implementation file
//

#include "stdafx.h"
#include "cminus.h"
#include "tokenizer.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/*  *    CTokenizer
    *    Construction & destruction
  *	* *
   ***   Programer: 陆晓春
    *    Date:		2004.05.17             */

CTokenizer::CTokenizer(CString &string)
{
	m_bPushedBack = FALSE;
	m_bEolIsSignificant = TRUE;
	m_bSlSlComments = TRUE;
	m_bSlStComments = TRUE;
	m_bForceLower = FALSE;	// case sensitive
	m_iLineNo = 1;			// the first line
	m_iChar = 0;
	m_peekc = ' ';
	m_sString = string;
	m_sString += -1;
}

CTokenizer::~CTokenizer()
{
}

/*  *    CTokenizer
    *    public functions
  *	* *
   ***   Programer: 陆晓春
    *    Date:		2004.05.17             */

void CTokenizer::PushBack()
{
	m_bPushedBack = TRUE;
}

// Specifies that EOL is significant or not
void CTokenizer::EolIsSignificant(BOOL bFlag)
{
	m_bEolIsSignificant = bFlag;
}

// Enable C Style comments
void CTokenizer::SlStComments(BOOL bFlag)
{
	m_bSlStComments = bFlag;
}

// Enable C++ Style comments
void CTokenizer::SlSlComments(BOOL bFlag)
{
	m_bSlSlComments = bFlag;
}

// Enable / disable case sensitivity
void CTokenizer::LowerCaseMode(BOOL bFlag)
{
	m_bForceLower = bFlag;
}

// gives the next Token, returns the token type
int CTokenizer::NextToken()
{
	if( m_bPushedBack ) {
		m_bPushedBack = FALSE;
		return m_tType;
	}

	int c = m_peekc;
	m_sVal = _T("");
	
	if ( c == EOF ) return m_tType = TT_EOF;
	
	// is this a space 
	while( ::isspace( c ) ) {
		if( c == '\r' ) {
			m_iLineNo++;
			c = GetChar();
			if( c == '\n' ) c = GetChar();
			if( m_bEolIsSignificant ) {
				m_peekc = c;
				return m_tType = TT_EOL;
			}
		} else {
			if( c == '\n' ) {
				m_iLineNo++;
				if( m_bEolIsSignificant ) {
					m_peekc = ' ';
					return m_tType = TT_EOL;
				}
			}
			c = GetChar();
		}

		if ( c == EOF ) return m_tType = TT_EOF;
	}
	
	// is this a number
	if( ::isdigit( c ) || c == '.' || c == '-' ) {
		BOOL neg = FALSE;
		if( c == '-' )	{
			c = GetChar();
			if( c != '.' && !::isdigit( c ) ) {
				m_peekc = c;
				return  m_tType = '-';
			}
			neg = TRUE;
		}
		double v = 0;
		int decexp = 0;
		int seendot = 0;
		while( true ) {
			if (c == '.' && seendot == 0)
				seendot = 1;
			else if ( ::isdigit( c ) ) {
				v = v* 10 + (c - '0');
				decexp += seendot;
			} else
				break;
			c = GetChar();
		}
		m_peekc = c;
		if( decexp != 0 ) {
			double denom = 10;
			decexp--;
			while( decexp > 0 ) {
				denom *= 10;
				decexp --;
			}
			v = v / denom;
		} else if( seendot == 1 ) {
			m_iChar --;
			m_peekc = '.';
			seendot = 0;
		}
		m_dVal = neg ? -v : v;
		if( seendot == 0 )
			return m_tType = TT_INTEGER;
		else
			return m_tType = TT_REAL;
	}
	
	// is this a word
	if( ::isalpha( c ) || c == '_' ) {
		int i = 0;
		m_sVal = _T("");
		do {
			m_sVal = m_sVal + (TCHAR)c;
			c = GetChar();
		} while ( ::isalnum( c ) || c == '_' );
		m_peekc = c;
		if( m_bForceLower )
			m_sVal.MakeLower();
		return m_tType = TT_WORD;
	}

	// now the char & string
	if( c == '\'' || c == '"' ) {
		m_sVal = _T("");
		m_tType = c;
		m_peekc = ' ';
		int i = 0, c2;
		while( (c = GetChar()) != EOF && c != m_tType && c != '\n' && c != '\r' ) {
			if( c == '\\' )// escape
				switch( c = GetChar() ) {
				case 'a': c = 0x7;  break;
				case 'b': c = '\b'; break;
				case 'f': c = 0xC;  break;
				case 'n': c = '\n'; break;
				case 'r': c = '\r'; break;
				case 't': c = '\t'; break;
				case 'v': c = 0xb;  break;
				case '0':
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
					c = c - '0';
					c2 = GetChar();
					if( c2 == m_tType ) {
						m_sVal += (TCHAR)c;
						return m_tType;
					}
					if( '0' <= c2 && c2 <= '7' ) {// octal
						c = (c<<3) + (c2 - '0');
						c2 = GetChar();
						if( c2 == m_tType ) {
							m_sVal += (TCHAR)c;
							return m_tType;
						}
						if( '0' <= c2 && c2 <= '7')
							c = (c<<3) + (c2 - '0');
						else {
							m_sVal += (TCHAR)c;
							c = c2;
						}
					} else {
						m_sVal += (TCHAR)c;
						c = c2;
					}
					break;
				default:
					// warning: 'c' : unrecognized character escape sequence
					OutputErrMsg( "warning in line %d: '%c': unrecognized character escape sequence",
						m_iLineNo, c );
				}
			m_sVal += (TCHAR)c;
		}
		if( c == EOF ) {
			// error msg: syntax error in line %d: missing '"'
			OutputErrMsg( "error in line %d: syntax error, missing '\"'", m_iLineNo );
		} else if( c == '\r' || c == '\n' ) {
			// error msg: syntax error in line %d: new line in constant
			OutputErrMsg( "error in line %d: syntax error, new line in constant", m_iLineNo );
		}

		return m_tType;
	}

	// and now the comment
	// "//" or "/*...*/"
	if( c == '/' && ( m_bSlSlComments || m_bSlStComments ) ) {
		c = GetChar();
		if( c == '*' && m_bSlStComments ) {
			int prevc = 0;
			while( (c = GetChar()) != '/' || prevc != '*' ) {
				if( c == '\n' )	m_iLineNo++;
				if( c == EOF ) return m_tType = TT_EOF;
				prevc = c;
			}
			m_peekc = ' ';
			return NextToken();
		} else {
			if (c == '/' && m_bSlSlComments) {
				while ( (c = GetChar()) != '\n' && c != '\r' );
				m_peekc = c;
				return NextToken();
			} else {
				m_peekc = c;
				return m_tType = '/';
			}
		}
	}

	m_peekc = ' ';
	return m_tType = c;
}

int CTokenizer::LineNo()
{
	return m_iLineNo;
}

CString CTokenizer::GetStrValue()
{
	CString ret;
	switch (m_tType)
	{
	case TT_EOF:
		ret = "EOF";
		break;
	case TT_EOL:
		ret = "EOL";
		break;
	case TT_WORD:
		ret = m_sVal;
		break;
	case TT_STRING:
		ret = m_sVal;
		break;
	case TT_INTEGER:
	case TT_REAL:
		ret.Format("%g",m_dVal);
		break;
	default:
		ret.Format ( "\'%c\'",(char)m_tType) ;
	}
	return ret;
}

double CTokenizer::GetDoubleNumValue()
{
	return m_dVal;
}

int CTokenizer::GetIntNumValue()
{
	return (int)m_dVal;
}